# Libraries
library(tidyverse)
library(readr)
library(kableExtra)
library(bookdown)
library(tinytex)
library(rgdal)
library(broom)
library(sf)
library(gganimate)
library(transformr)
# read data
daily_caloric_supply <- read_csv("Data/daily-caloric-supply-derived-from-carbohydrates-protein-and-fat.csv")
dietary_compositions <- read_csv("Data/dietary-compositions-by-commodity-group.csv")
overweight_calories <- read_csv("Data/share-of-adult-men-overweight-or-obese-vs-daily-supply-of-calories.csv")
# data wrangling
daily_caloric_supply <- daily_caloric_supply %>%
rename_all(str_remove, pattern = "\\(FAO.+\\)") %>%
select(-Code)
dietary_compositions <- dietary_compositions %>%
rename_all(str_remove, pattern = "\\(FAO.+\\)") %>%
select(-Code)
# data figure1
figure1 <- dietary_compositions %>%
filter(Entity == 'Australia') %>%
pivot_longer(cols = -c(Entity,Year),
names_to = 'Variable',
values_to = 'Value') %>%
ggplot(aes(x = Year,
y = Value,
fill = Variable)) +
geom_area(color = 'white') +
scale_fill_viridis_d() +
labs(y = 'Kilocalories per Person per Day',
title = 'Kilocalories per Person per Day in Australia') +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = 'bottom',
text = element_text(size = 8)) +
transition_reveal(Year)
figure1
animate(figure1,
res = 300,
width = 2000,
height = 1125,
renderer = gifski_renderer())
Figure 1.1: Kilocalories per Person per Day in Australia
anim_save("figure1.gif")
According to Figure1.1, we can see the different colours represent different food groups and the larger the area, the greater the proportion of the Australian diet. It is clear that they prefer cereals and grains, meat, fats and sugary foods to pulses and starchy roots.
According to the Australian government’s dietary guidelines, these unhealthy diets have led to many Australian adults and about a quarter of children being overweight or obese, so it’s time to make changes for the sake of our health Grech, Rangan, and Allman-Farinelli (2018).
# read world map data
world <- readOGR(dsn = 'World_Countries_(Generalized)/.')
OGR data source with driver: ESRI Shapefile
Source: "/Users/gu1gu1/Desktop/assignment4-group1/World_Countries_(Generalized)", layer: "World_Countries__Generalized_"
with 249 features
It has 7 fields
world_shp <- world %>%
st_as_sf()
world_data <- daily_caloric_supply %>%
select(Entity, Year, `Calories from animal protein `) %>%
merge(world_shp,
by.x = "Entity",
by.y = "COUNTRY") %>%
st_as_sf()
# data figure2
figure2 <- world_data %>%
ggplot(aes(fill = `Calories from animal protein `)) +
geom_sf(colour = NA) +
labs(x = 'Longitude',
y = 'Latitude',
title = ' Year: {closest_state}') +
scale_fill_viridis_c(na.value = 'grey') +
theme_void() +
theme(legend.position = 'bottom') +
transition_states(states = Year)
figure2
animate(figure2,
res = 300,
width = 2000,
height = 1125)
Figure 3.1: Year: {closest_state}
anim_save('figure2.gif')
To better represent the variation in calories provided by animal protein around the world, I first downloaded data from a world map online, then matched it to the dataset I chose by country name, then filled in the colours according to the calories provided by animal protein to create Figure3.1. The closer the color is to green, the more calories from animal protein, and conversely the closer the color is to blue the less there is. Overall, the amount of calories from animal protein has increased worldwide.
Observing the Figure3.1 we can find that people living in North America, Oceania, and Europe consume more animal protein to provide calories, simply put, their diet composition prefers meat products, but also from the side to reflect the continued high consumption of livestock products in almost all developed countries Stoll-Kleemann and O’Riordan (2015).
# data wrangling
by_entity <- overweight_calories %>%
rename('caloric_supply' = 'Daily caloric supply (OWID based on UN FAO & historical sources)',
'Overweight' = 'Overweight or Obese (NCDRisC (2017))') %>%
select(Entity, Year, caloric_supply, Overweight) %>%
filter(Entity %in% c("Australia", "China",
"United States", "United Kingdom",
"South Africa", "Brazil"),
Year >= 2000) %>%
drop_na()
# linear models
by_entity2 <- by_entity %>%
group_by(Entity)%>%
nest()
fit_lm <- function(x){
lm(Overweight~caloric_supply, data = x)
}
mapped_lm <- map(by_entity$data, fit_lm)
scatter_plot <-
ggplot(data = by_entity,
aes(x = caloric_supply,
y = Overweight)) +
geom_point(alpha = 0.4) +
facet_wrap(~Entity, scales = "free") +
geom_smooth(method = "lm")
scatter_plot
entity_model <- by_entity2 %>%
mutate(model = map(data, function(x){
lm(Overweight~caloric_supply, data = x)
})
)
entity_model %>%
mutate(tidy = map(model, tidy))
# A tibble: 6 × 4
# Groups: Entity [6]
Entity data model tidy
<chr> <list> <list> <list>
1 Australia <tibble [15 × 3]> <lm> <tibble [2 × 5]>
2 Brazil <tibble [15 × 3]> <lm> <tibble [2 × 5]>
3 China <tibble [15 × 3]> <lm> <tibble [2 × 5]>
4 South Africa <tibble [15 × 3]> <lm> <tibble [2 × 5]>
5 United Kingdom <tibble [15 × 3]> <lm> <tibble [2 × 5]>
6 United States <tibble [15 × 3]> <lm> <tibble [2 × 5]>
entity_coefs <- entity_model %>%
mutate(tidy = map(model, tidy)) %>%
unnest(tidy) %>%
select(Entity, term, estimate)
tidy_entity_coefs <- entity_coefs %>%
pivot_wider(id_cols = c(Entity),
names_from = term,
values_from = estimate) %>%
rename(Intercept = `(Intercept)`,
Slope = caloric_supply)
entity_glance <- entity_model %>%
mutate(glance = map(model, glance)) %>%
unnest(glance) %>%
select(Entity, r.squared, AIC, BIC)
Table1 = entity_glance
knitr::kable(Table1, booktabs = TRUE, "html",
caption = "Goodness_of_fit_measures") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| Entity | r.squared | AIC | BIC |
|---|---|---|---|
| Australia | 0.9079612 | 40.73470 | 42.85885 |
| Brazil | 0.9232400 | 48.68766 | 50.81181 |
| China | 0.9727043 | 46.42461 | 48.54876 |
| South Africa | 0.7375348 | 66.86584 | 68.98999 |
| United Kingdom | 0.0040535 | 77.89588 | 80.02003 |
| United States | 0.3628869 | 69.38632 | 71.51047 |
The scatter plot shows that the Chinese fitted linear model is the best, while, according to Table5.1, we can find that Chinese linear model has a maximum r.squared value around 0.97 and there are relatively small AIC and BIC values of around 46.42 and 48.55 respectively. Therefore, we can find that Chinese people’s overweight or obese are more affected by calorie intake.
R-squared is the percentage of outcome variable variation explained by the model, and describes how close the data are to the fitted regression. In general, the higher the R-squared value, the better the model fits. AIC and BIC both aim at achieving a compromise between model goodness of fit and model complexity. The preferred models are those with minimum AIC/BIC (Yang and Berdine (2015)).